from pymongo import MongoClient

# 打开集合
client = MongoClient("localhost", 27017)
db = client.Advanced
collection = db.Test

patents = []  # 存储已遍历过的unique字段
count = 0
for item in collection.find():
    if item['href'] not in patents:  # 判断当前文档是否在之前已经遍历过
        patents.append(item['href'])  # 该文档设置成已遍历
    else:
        collection.delete_one(item)  # 删除重复文档